# -*- coding:utf-8 -*-
# @Email   : lishuaichao@lingxi.ai
# @Time    : 2023/11/21 19:42

import re

import pandas as pd


def execute(source_file, target_file):
    df_faq = pd.read_excel(source_file)
    datas = df_faq.to_dict('records')
    # datas = sorted(datas, key=lambda x: x['msg_time'])
    for data in datas:
        if data['speaker_type'] == 'USER':
            data['过滤文本'] = data['msg_content']
        elif data['speaker_type'] == 'IVR':
            data['过滤文本'] = '。'.join(re.findall('@#.*?\|\|(.*?)#@', str(data['bot_words_text'])))
        data['过滤文本'] = re.sub('\[.*?\]', '', data['过滤文本'])
        data['过滤文本'] = data['过滤文本'].replace('@@quiet@@', '(用户没说话)')

    pd.DataFrame(datas).to_excel(target_file,index=False)


execute('慧择对话文本.xlsx', '慧择对话文本-format.xlsx')